{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "zX4Kg8DUTKWO"
   },
   "outputs": [],
   "source": [
    "# 模拟神经网络输入数据的生成\n",
    "\n",
    "\n",
    "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "# you may not use this file except in compliance with the License.\n",
    "# You may obtain a copy of the License at\n",
    "#\n",
    "# https://www.apache.org/licenses/LICENSE-2.0\n",
    "#\n",
    "# Unless required by applicable law or agreed to in writing, software\n",
    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "# See the License for the specific language governing permissions and\n",
    "# limitations under the License."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "s6eq-RBcQ_Zr"
   },
   "outputs": [],
   "source": [
    "try:\n",
    "  # %tensorflow_version only exists in Colab.\n",
    "  %tensorflow_version 2.x\n",
    "except Exception:\n",
    "  pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "BOjujz601HcS"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.2.0\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "print(tf.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "asEdslR_05O_"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n"
     ]
    }
   ],
   "source": [
    "# 生成序列数据\n",
    "dataset = tf.data.Dataset.range(10)\n",
    "for val in dataset:\n",
    "   print(val.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Lrv_ghSt1lgQ"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1 2 3 4 \n",
      "1 2 3 4 5 \n",
      "2 3 4 5 6 \n",
      "3 4 5 6 7 \n",
      "4 5 6 7 8 \n",
      "5 6 7 8 9 \n",
      "6 7 8 9 \n",
      "7 8 9 \n",
      "8 9 \n",
      "9 \n"
     ]
    }
   ],
   "source": [
    "# 获得窗口数据，窗口大小为5\n",
    "dataset = tf.data.Dataset.range(10)\n",
    "dataset = dataset.window(5, shift=1)\n",
    "for window_dataset in dataset:\n",
    "  for val in window_dataset:\n",
    "    print(val.numpy(), end=\" \")\n",
    "  print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "QLEq6MG-2DN2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1 2 3 4 \n",
      "1 2 3 4 5 \n",
      "2 3 4 5 6 \n",
      "3 4 5 6 7 \n",
      "4 5 6 7 8 \n",
      "5 6 7 8 9 \n"
     ]
    }
   ],
   "source": [
    "# 去掉不完整的数据\n",
    "dataset = tf.data.Dataset.range(10)\n",
    "dataset = dataset.window(5, shift=1, drop_remainder=True)\n",
    "for window_dataset in dataset:\n",
    "  for val in window_dataset:\n",
    "    print(val.numpy(), end=\" \")\n",
    "  print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "PJ9CAHlJ2ODe"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 1 2 3 4]\n",
      "[1 2 3 4 5]\n",
      "[2 3 4 5 6]\n",
      "[3 4 5 6 7]\n",
      "[4 5 6 7 8]\n",
      "[5 6 7 8 9]\n"
     ]
    }
   ],
   "source": [
    "# 转为numpy列表\n",
    "dataset = tf.data.Dataset.range(10)\n",
    "dataset = dataset.window(5, shift=1, drop_remainder=True)\n",
    "dataset = dataset.flat_map(lambda window: window.batch(5))\n",
    "for window in dataset:\n",
    "  print(window.numpy())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "DryEZ2Mz2nNV"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 1 2 3] [4]\n",
      "[1 2 3 4] [5]\n",
      "[2 3 4 5] [6]\n",
      "[3 4 5 6] [7]\n",
      "[4 5 6 7] [8]\n",
      "[5 6 7 8] [9]\n"
     ]
    }
   ],
   "source": [
    "# 打散数据\n",
    "dataset = tf.data.Dataset.range(10)\n",
    "dataset = dataset.window(5, shift=1, drop_remainder=True)\n",
    "dataset = dataset.flat_map(lambda window: window.batch(5))\n",
    "dataset = dataset.map(lambda window: (window[:-1], window[-1:]))\n",
    "for x,y in dataset:\n",
    "  print(x.numpy(), y.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "1tl-0BOKkEtk"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[3 4 5 6] [7]\n",
      "[4 5 6 7] [8]\n",
      "[1 2 3 4] [5]\n",
      "[2 3 4 5] [6]\n",
      "[0 1 2 3] [4]\n",
      "[5 6 7 8] [9]\n"
     ]
    }
   ],
   "source": [
    "dataset = tf.data.Dataset.range(10)\n",
    "dataset = dataset.window(5, shift=1, drop_remainder=True)\n",
    "dataset = dataset.flat_map(lambda window: window.batch(5))\n",
    "dataset = dataset.map(lambda window: (window[:-1], window[-1:]))\n",
    "dataset = dataset.shuffle(buffer_size=10)\n",
    "for x,y in dataset:\n",
    "  print(x.numpy(), y.numpy())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Wa0PNwxMGapy"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x =  [[5 6 7 8]\n",
      " [1 2 3 4]]\n",
      "y =  [[9]\n",
      " [5]]\n",
      "x =  [[4 5 6 7]\n",
      " [2 3 4 5]]\n",
      "y =  [[8]\n",
      " [6]]\n",
      "x =  [[3 4 5 6]\n",
      " [0 1 2 3]]\n",
      "y =  [[7]\n",
      " [4]]\n"
     ]
    }
   ],
   "source": [
    "# 设置数据批量，每两个数据为一批次\n",
    "dataset = tf.data.Dataset.range(10)\n",
    "dataset = dataset.window(5, shift=1, drop_remainder=True)\n",
    "dataset = dataset.flat_map(lambda window: window.batch(5))\n",
    "dataset = dataset.map(lambda window: (window[:-1], window[-1:]))\n",
    "dataset = dataset.shuffle(buffer_size=10)\n",
    "\n",
    "for x,y in dataset:\n",
    "  print(\"x = \", x.numpy())\n",
    "  print(\"y = \", y.numpy())\n"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "name": "S+P Week 2 Lesson 1.ipynb",
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
